{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {
    "ExecuteTime": {
     "end_time": "2017-06-19T22:07:28.383531Z",
     "start_time": "2017-06-19T22:07:27.981102Z"
    },
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "import pandas as pd\n",
    "import numpy as np\n",
    "pd.set_option(\"display.max_rows\",15)\n",
    "%matplotlib inline"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {
    "ExecuteTime": {
     "end_time": "2017-06-19T22:07:28.593348Z",
     "start_time": "2017-06-19T22:07:28.384973Z"
    },
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "class dataset:\n",
    "    kdd_train_2labels = pd.read_pickle(\"dataset/kdd_train_2labels.pkl\")\n",
    "    kdd_train_2labels_y = pd.read_pickle(\"dataset/kdd_train_2labels_y.pkl\")\n",
    "    \n",
    "    kdd_test_2labels = pd.read_pickle(\"dataset/kdd_test_2labels.pkl\")\n",
    "    kdd_test_2labels_y = pd.read_pickle(\"dataset/kdd_test_2labels_y.pkl\")\n",
    "    \n",
    "    kdd_train_5labels = pd.read_pickle(\"dataset/kdd_train_5labels.pkl\")\n",
    "    kdd_train_5labels_y = pd.read_pickle(\"dataset/kdd_train_5labels_y.pkl\")\n",
    "    \n",
    "    kdd_test_5labels = pd.read_pickle(\"dataset/kdd_test_5labels.pkl\")\n",
    "    kdd_test_5labels_y = pd.read_pickle(\"dataset/kdd_test_5labels_y.pkl\")\n",
    "    "
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {
    "ExecuteTime": {
     "end_time": "2017-06-19T22:07:28.927883Z",
     "start_time": "2017-06-19T22:07:28.595030Z"
    },
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "import collections\n",
    "from sklearn.tree import DecisionTreeClassifier\n",
    "from sklearn.neural_network import MLPClassifier\n",
    "from sklearn.ensemble import RandomForestClassifier, AdaBoostClassifier\n",
    "from sklearn.svm import NuSVC, SVC\n",
    "from sklearn.naive_bayes import GaussianNB\n",
    "from sklearn.model_selection import cross_val_score\n",
    "from sklearn.model_selection import cross_val_predict\n",
    "from sklearn.model_selection import ShuffleSplit\n",
    "from sklearn.pipeline import make_pipeline\n",
    "from sklearn import preprocessing\n",
    "from sklearn import metrics\n",
    "import time \n",
    "from sklearn.decomposition import SparsePCA"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {
    "ExecuteTime": {
     "end_time": "2017-06-19T22:07:28.972103Z",
     "start_time": "2017-06-19T22:07:28.929335Z"
    },
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "x_train, y_train = dataset.kdd_train_2labels.iloc[:,:-2], dataset.kdd_train_2labels_y\n",
    "x_test, y_test = dataset.kdd_test_2labels.iloc[:,:-2], dataset.kdd_test_2labels_y"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "metadata": {
    "ExecuteTime": {
     "end_time": "2017-06-19T22:07:32.440401Z",
     "start_time": "2017-06-19T22:07:28.973664Z"
    },
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "pca_analysis = SparsePCA(n_components=2)\n",
    "x_train_analysis = pca_analysis.fit_transform(x_train)\n",
    "y_train_analysis = dataset.kdd_train_2labels.iloc[:,-2]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "metadata": {
    "ExecuteTime": {
     "end_time": "2017-06-19T22:07:34.226624Z",
     "start_time": "2017-06-19T22:07:32.443112Z"
    }
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "<matplotlib.legend.Legend at 0x7f13dbdf3cf8>"
      ]
     },
     "execution_count": 6,
     "metadata": {},
     "output_type": "execute_result"
    },
    {
     "data": {
      "image/png": "iVBORw0KGgoAAAANSUhEUgAAAXcAAAD8CAYAAACMwORRAAAABHNCSVQICAgIfAhkiAAAAAlwSFlz\nAAALEgAACxIB0t1+/AAAFktJREFUeJzt3X+QVOWd7/H3l2EirKuiyLrKyIWtMm4sfm4mGn+krsTr\nAiZ7MZvVQo2I2Uis+CNxjUayFcrLVmqT0pL110ZJUGtzDegiIhgjN4j3rpZxw7AQEJUsKspg9op4\nJTF3vAI+949uxp5xhume6emZfny/qrpmznOe6fOZbvhwOH26T6SUkCTlZchAB5AkVZ/lLkkZstwl\nKUOWuyRlyHKXpAxZ7pKUIctdkjJkuUtShix3ScrQ0IHa8NFHH53Gjh07UJuXpLq0fv36N1NKo3qa\nN2DlPnbsWFpaWgZq85JUlyLi1XLmeVhGkjJkuUtShix3ScrQgB1z78revXtpbW3l3XffHegog86w\nYcNoamqisbFxoKNIqgODqtxbW1s57LDDGDt2LBEx0HEGjZQSu3fvprW1lXHjxg10HEl1YFAdlnn3\n3XcZOXKkxd5JRDBy5Ej/RyPVm00PwsLxcOOIwtdND9Zs04Nqzx2w2Lvh4yLVmU0PwqqrYW9bYXnP\njsIywMTz+33zg2rPXZKy8cSCD4r9gL1thfEasNw7iQiuvfba9uWbb76ZG2+8saYZ5syZw7Jly2q6\nTUlVtqe1svEqs9w7OeSQQ1i+fDlvvvlmr35+3759VU4kqS4d0VTZeJUNumPulVixYSc3rd7K62+3\ncdyI4Vw37UTOnTK6T/c5dOhQ5s6dy8KFC/nud7/bYd327dv58pe/zJtvvsmoUaO49957GTNmDHPm\nzGHYsGFs2LCB008/ncMPP5xXXnmFl19+mddee42FCxfy7LPP8rOf/YzRo0ezatUqGhsbWbBgAatW\nraKtrY3TTjuNu+++22PrUi7Omt/xmDtA4/DCeA3U7Z77ig07mbd8MzvfbiMBO99uY97yzazYsLPP\n933FFVdw//33s2fPng7jV111FZdccgmbNm3ioosu4uqrr25f19rayjPPPMMtt9wCwEsvvcTatWtZ\nuXIlX/rSl5g6dSqbN29m+PDh/PSnPwXgyiuvZN26dTz33HO0tbXx6KOP9jm7pEFi4vnwF7fBEccD\nUfj6F7fV5MVUqONyv2n1Vtr27u8w1rZ3Pzet3trn+z788MOZPXs2t912W4fxX/ziF1x44YUAXHzx\nxTz99NPt68477zwaGhral2fMmEFjYyMTJkxg//79TJ8+HYAJEyawfft2AJ588klOOeUUJkyYwNq1\na9myZUufs0saRCaeD9c8Bze+Xfhao2KHOi73199uq2i8Ut/4xjdYvHgxv//978uaf+ihh3ZYPuSQ\nQwAYMmQIjY2N7YdbhgwZwr59+3j33Xf52te+xrJly9i8eTOXXXaZ57FLqpq6LffjRgyvaLxSRx11\nFOeffz6LFy9uHzvttNNYunQpAPfffz+f+cxnen3/B4r86KOP5p133vHsGElVVbflft20Exne2NBh\nbHhjA9dNO7Fq27j22ms7nDVz++23c++99zJx4kR+/OMfc+utt/b6vkeMGMFll13G+PHjmTZtGp/6\n1KeqEVmSAIiU0oBsuLm5OXW+WMcLL7zAJz7xibLvoz/OlhnMKn18JOUnItanlJp7mlfXp0KeO2V0\n1mUuSb1Vt4dlJEnds9wlKUOWuyRlyHKXpAxZ7pKUIcu9CytWrCAiePHFF4HCB4b95Cc/aV+/ceNG\nHnvssV7f/9ixY3v9qZOSVA7LvQtLlizhjDPOYMmSJUD1y12S+lt9l3s/XJ/wnXfe4emnn2bx4sXt\nHzVwww038NRTTzF58mS+//3vM3/+fB544AEmT57MAw88wC9/+UtOPfVUpkyZwmmnncbWrYUPL9u/\nfz/f/OY3GT9+PBMnTuT222/vsK22tjZmzJjBD3/4wz7nlqRS9fsmpn66PuEjjzzC9OnT+fjHP87I\nkSNZv3493/ve97j55pvbP5L3mGOOoaWlhTvuuAOA3/72tzz11FMMHTqUNWvW8O1vf5uHHnqIRYsW\nsX37djZu3MjQoUN566232rfzzjvvMGvWLGbPns3s2bN7nVeSulK/e+79dH3CJUuWMGvWLABmzZrV\nfmjmYPbs2cN5553H+PHjueaaa9o/unfNmjV89atfZejQwr+hRx11VPvPzJw5k0svvdRil9Qv6nfP\nvR+uT/jWW2+xdu1aNm/eTESwf/9+IoLPfe5zB/2573znO0ydOpWHH36Y7du3c+aZZ/a4rdNPP53H\nH3+cCy+80KsvSaq6+t1z74frEy5btoyLL76YV199le3bt7Njxw7GjRvHkCFD+N3vftc+77DDDuuw\nvGfPHkaPLnzGzX333dc+fvbZZ3P33Xe3X1e19LDMggULOPLII7niiit6nVeSulNWuUfE9IjYGhHb\nIuKGLtYfERGrIuJXEbElIi6tftROzppfuB5hqT5en3DJkiV84Qtf6DD2xS9+kaVLl9LQ0MCkSZNY\nuHAhU6dO5fnnn29/QfX6669n3rx5TJkypcMFsr/yla8wZswYJk6cyKRJkzqccQNw66230tbWxvXX\nX9/rzJLUlR4/8jciGoBfA2cDrcA64IKU0vMlc74NHJFS+lZEjAK2An+cUnqvu/utxkf+sunBwjH2\nPa2FPfaz5tf0Mla15kf+SqrmR/6eDGxLKb1cvOOlwEzg+ZI5CTgsCgeP/xB4C9jX+Y6qbuL5WZe5\nJPVWOYdlRgM7SpZbi2Ol7gA+AbwObAa+nlJ6v/MdRcTciGiJiJZdu3b1MrIkqSfVekF1GrAROA6Y\nDNwREYd3npRSWpRSak4pNY8aNarLOxqoK0MNdj4ukipRTrnvBI4vWW4qjpW6FFieCrYBrwB/WmmY\nYcOGsXv3bousk5QSu3fvZtiwYQMdRVKdKOeY+zrghIgYR6HUZwEXdprzGnAW8FREHAOcCLxcaZim\npiZaW1vxkM2HDRs2jKam3p/mKemjpcdyTynti4grgdVAA3BPSmlLRFxeXH8X8HfAfRGxGQjgWyml\nij/2sLGxkXHjxlX6Y5KkTsp6h2pK6THgsU5jd5V8/zrw59WNJknqrfp9h6okqVuWuyRlyHKXpAxZ\n7pKUIctdkjJkuUtShix3ScqQ5S5JGbLcJSlDlrskZchyl6QMWe6SlCHLXZIyZLlLUoYsd0nKkOUu\nSRmy3CUpQ5a7JGXIcpekDFnukpQhy12SMmS5S1KGLHdJypDlLkkZstwlKUOWuyRlyHKXpAxZ7pKU\nIctdkjJkuUtShix3ScqQ5S5JGbLcJSlDZZV7REyPiK0RsS0ibuhmzpkRsTEitkTE/6puTElSJYb2\nNCEiGoA7gbOBVmBdRKxMKT1fMmcE8I/A9JTSaxHxR/0VWJLUs3L23E8GtqWUXk4pvQcsBWZ2mnMh\nsDyl9BpASumN6saUJFWinHIfDewoWW4tjpX6OHBkRPzPiFgfEbOrFVCSVLkeD8tUcD+fBM4ChgO/\niIhnU0q/Lp0UEXOBuQBjxoyp0qYlSZ2Vs+e+Ezi+ZLmpOFaqFVidUvp9SulN4F+ASZ3vKKW0KKXU\nnFJqHjVqVG8zS5J6UE65rwNOiIhxEfExYBawstOcR4AzImJoRPwBcArwQnWjSpLK1eNhmZTSvoi4\nElgNNAD3pJS2RMTlxfV3pZReiIjHgU3A+8CPUkrP9WdwSVL3IqU0IBtubm5OLS0tA7JtSapXEbE+\npdTc0zzfoSpJGbLcJSlDlrskZchyl6QMWe6SlCHLXZIyZLlLUoYsd0nKkOUuSRmy3CUpQ5a7JGXI\ncpekDFnukpQhy12SMmS5S1KGLHdJypDlLkkZstwlKUOWuyRlyHKXpAxZ7pKUIctdkjJkuUtShix3\nScqQ5S5JGbLcJSlDlrskZchyl6QMWe6SlCHLXZIyZLlLUoYsd0nKkOUuSRkqq9wjYnpEbI2IbRFx\nw0HmfSoi9kXEX1UvoiSpUj2We0Q0AHcCM4CTgAsi4qRu5n0f+B/VDilJqkw5e+4nA9tSSi+nlN4D\nlgIzu5h3FfAQ8EYV80mSeqGcch8N7ChZbi2OtYuI0cAXgB9UL5okqbeq9YLqPwDfSim9f7BJETE3\nIloiomXXrl1V2rQkqbOhZczZCRxfstxUHCvVDCyNCICjgXMiYl9KaUXppJTSImARQHNzc+ptaEnS\nwZVT7uuAEyJiHIVSnwVcWDohpTTuwPcRcR/waOdilyTVTo/lnlLaFxFXAquBBuCelNKWiLi8uP6u\nfs4oSapQOXvupJQeAx7rNNZlqaeU5vQ9liSpL3yHqiRlyHKXpAxZ7pKUIctdkjJkuUtShix3ScqQ\n5S5JGbLcJSlDlrskZchyl6QMWe6SlCHLXZIyZLlLUoYsd0nKkOUuSRmy3CUpQ5a7JGXIcpekDJV1\nmT1JUi88+jew/j5I+yEa4JNz4PO31GTTlrsk9YdH/wZaFn+wnPZ/sFyDgvewjCT1h/X3VTZeZZa7\nJPWHtL+y8Sqz3CWpP0RDZeNVZrlLUn/45JzKxqvMF1QlqT8ceNHUs2UkKTOfv6VmZd6Zh2UkKUOW\nuyRlyHKXpAxZ7pKUIctdkjJkuUtShix3ScqQ5S5JGSqr3CNiekRsjYhtEXFDF+sviohNEbE5Ip6J\niEnVjypJKleP5R4RDcCdwAzgJOCCiDip07RXgP+cUpoA/B2wqNpBJUnlK2fP/WRgW0rp5ZTSe8BS\nYGbphJTSMyml/1NcfBZoqm5MSVIlyin30cCOkuXW4lh3/hr4WV9CSZL6pqofHBYRUymU+xndrJ8L\nzAUYM2ZMNTctSSpRzp77TuD4kuWm4lgHETER+BEwM6W0u6s7SiktSik1p5SaR40a1Zu8kqQylFPu\n64ATImJcRHwMmAWsLJ0QEWOA5cDFKaVfVz+mJKkSPR6WSSnti4grgdVAA3BPSmlLRFxeXH8XMB8Y\nCfxjRADsSyk1919sSdLBREppQDbc3NycWlpaBmTbklSvImJ9OTvPvkNVkjJkuUtShix3ScqQ5S5J\nGbLcJSlDlrskZchyl6QMWe6SlCHLXZIyZLlLUoYsd0nKkOUuSRmy3CUpQ5a7JGXIcpekDFnukpQh\ny12SMmS5S1KGLHdJypDlLkkZstwlKUOWuyRlyHKXpAxZ7pKUIctdkjJkuUtShix3ScqQ5S5JGbLc\nJSlDlrskZchyl6QMWe6SlCHLXZIyNLScSRExHbgVaAB+lFL6Xqf1UVx/DvB/gTkppX+rclZJ6lcr\nNuzkptVbef3tNo4bMZzrpp3IuVNG9/r+3p9/BBEfLKcEQxbsqULSnvVY7hHRANwJnA20AusiYmVK\n6fmSaTOAE4q3U4AfFL9W36YH4YkFsKcVjmiCs+bDxPP7ZVNVVa+5Pwr6+tzk9twOpt+nhllWbNjJ\nvOWbadu7H4Cdb7cxb/lmgB4LfsWGndy4cgvX7L2bixrW0sD7AETQodyhUPi1KPhyDsucDGxLKb2c\nUnoPWArM7DRnJvBPqeBZYEREHFvlrIUnetXVsGcHkApfV11dGB/M6jX3R0Ffn5vcntvB9PvUOMtN\nq7e2F/sBbXv3c9PqrQf9uRUbdnLdP/+Ka/bezeyGNQyN99tLvXOxdzXWX8op99HAjpLl1uJYpXP6\n7okFsLet49jetsL4YFavuT8K+vrc5PbcDqbfp8ZZXn+7raLxA25avZW97ycualhbs+IuR01fUI2I\nuRHREhEtu3btqvwO9rRWNj5Y1Gvuj4K+Pje5PbeD6fepcZbjRgyvaPyAA+V/4FDMYFFOue8Eji9Z\nbiqOVTqHlNKilFJzSql51KhRlWYtHHOrZHywqNfcHwV9fW5ye24H0+9T4yzXTTuR4Y0NHcaGNzZw\n3bQTD/pzB8p//yA7+bCcNOuAEyJiXER8DJgFrOw0ZyUwOwo+DexJKf2mylkLL6Y0dvpXtHF4YXww\nq9fcHwV9fW5ye24H0+9T4yznThnN3//lBEaPGE4Ao0cM5+//ckKPL6ZeN+1EGocE9+//LCkdfBsp\n0eOcaunxbJmU0r6IuBJYTeFUyHtSSlsi4vLi+ruAxyicBrmNwqmQl/ZL2gOvkg+WV/LLVa+5Pwr6\n+tzk9twOpt9nALKcO2V0xac+Hph/48qvwl46nC3TWS1PhYxUq39GOmlubk4tLS0Dsm1JqlcRsT6l\n1NzTvMF1kEiSVBWWuyRlyHKXpAxZ7pKUIctdkjJkuUtShix3ScqQ5S5JGRqwNzFFxC7g1QHZeO8c\nDbw50CEqVI+ZoT5zm7k26jEzVDf3f0op9fjhXANW7vUmIlrKeVfYYFKPmaE+c5u5NuoxMwxMbg/L\nSFKGLHdJypDlXr5FAx2gF+oxM9RnbjPXRj1mhgHI7TF3ScqQe+6SlCHLvRsRcVRE/Dwi/r349chu\n5o2IiGUR8WJEvBARp9Y6a0mWcjNvj4jNEbExIgb0Q/XLzVyc2xARGyLi0Vpm7CZLj7kjYlhE/DIi\nfhURWyLivw1E1pI85WQ+PiKejIjni5m/PhBZS/KU+2f6noh4IyKeq3XGkgzTI2JrRGyLiBu6WB8R\ncVtx/aaI+LP+zGO5d+8G4ImU0gnAE8XlrtwKPJ5S+lNgEvBCjfJ1pdzMAFNTSpMHwWlllWT+OgP7\n+JYqJ/f/Az6bUpoETAamFy9DOVDKybwPuDaldBLwaeCKiDiphhk7K/fPx33A9FqF6iwiGoA7gRnA\nScAFXTxuM4ATire5wA/6NVRKyVsXN2ArcGzx+2OBrV3MOQJ4heJrFwN9Kydzcd124OiBzlth5iYK\nf7k/CzxaL7lL5v8B8G/AKfWSuTjvEeDsesgMjAWeG6CcpwKrS5bnAfM6zbkbuKCr360/bu65d++Y\n9MFFvv8DOKaLOeOAXcC9xcMFP4qIQ2uW8MPKyQyQgDURsT4i5tYmWrfKzfwPwPXQzcUpa6+s3MVD\nSRuBN4Cfp5T+tVYBu1DuYw1ARIwFpgB1k3kAjQZ2lCy3FscqnVM1PV4gO2cRsQb44y5W/W3pQkop\nRURXpxUNBf4MuCql9K8RcSuF/zZ+p+phi6qQGeCMlNLOiPgj4OcR8WJK6V+qnfWAvmaOiM8Db6SU\n1kfEmf2T8sOq8VinlPYDkyNiBPBwRIxPKfXbceEq/fkgIv4QeAj4Rkrpt9VN+aFtVSWzOvpIl3tK\n6b90ty4i/ndEHJtS+k1EHEthz6uzVqC1ZG9sGQc/ZtxnVchMSmln8esbEfEwcDLQb+VehcynA/81\nIs4BhgGHR8R/Tyl9qZ8iA9V5rEvu6+2IeJLCceF+K/dqZI6IRgrFfn9KaXk/RW1Xzcd5AO0Eji9Z\nbiqOVTqnajws072VwCXF7y+hcOyxg5TSfwA7IuLE4tBZwPO1idelHjNHxKERcdiB74E/px/Lpgzl\nPM7zUkpNKaWxwCxgbX8XexnKeaxHFffYiYjhwNnAizVL+GHlZA5gMfBCSumWGmbrTo+ZB4l1wAkR\nMS4iPkbhz+nKTnNWArOLZ818GthTcsip+gbixYd6uAEjKbyA9+/AGuCo4vhxwGMl8yYDLcAmYAVw\n5GDODPwJ8KvibQvwt/XwOJfMP5PB8YJqOY/1RGBD8c/Gc8D8Osh8BoXXZDYBG4u3cwZz5uLyEuA3\nwF4K/6P+6wHIeg7wa+ClA3+vgMuBy4vfB4Uzal4CNgPN/ZnHd6hKUoY8LCNJGbLcJSlDlrskZchy\nl6QMWe6SlCHLXZIyZLlLUoYsd0nK0P8HpNO+2aXsS7QAAAAASUVORK5CYII=\n",
      "text/plain": [
       "<matplotlib.figure.Figure at 0x7f13dc052f60>"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    }
   ],
   "source": [
    "import matplotlib.pyplot as plt\n",
    "\n",
    "plt.scatter(x = x_train_analysis[y_train_analysis==0,0], y = x_train_analysis[y_train_analysis==0,1], label = 'Normal')\n",
    "plt.scatter(x = x_train_analysis[y_train_analysis==1,0], y = x_train_analysis[y_train_analysis==1,1], label = 'Attack')\n",
    "\n",
    "plt.legend()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "metadata": {
    "ExecuteTime": {
     "end_time": "2017-06-19T22:07:34.231043Z",
     "start_time": "2017-06-19T22:07:34.228269Z"
    },
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "pca = SparsePCA(n_components=10)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "ExecuteTime": {
     "start_time": "2017-06-19T22:07:28.642Z"
    },
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "#names_noscaling = [\"RBF SVM\", \"Decision Tree\", \"Random Forest\", \"Naive Bayes\"]\n",
    "names_noscaling = [\"Decision Tree\", \"Random Forest\"]\n",
    "classifiers_noscaling = [\n",
    "                        DecisionTreeClassifier(),\n",
    "                        RandomForestClassifier()\n",
    "                        ]\n",
    "\n",
    "names_withscaling = [\"SVC\", \"Non - Linear SVM\", \"AdaBoost\", \"Naive Bayes\"]\n",
    "classifiers_withscaling = [SVC(), NuSVC(),\n",
    "                            AdaBoostClassifier(), GaussianNB()]\n",
    "\n",
    "score = collections.namedtuple(\"score\", [\"name\", \"valid_score\" ,\"test_score\", 'time_taken'])\n",
    "scores = []\n",
    "cv = ShuffleSplit(n_splits=10, test_size=0.2, random_state=0)\n",
    "\n",
    "x_train = pca.fit_transform(x_train)\n",
    "x_test = pca.fit_transform(x_test)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "ExecuteTime": {
     "start_time": "2017-06-19T22:07:28.645Z"
    }
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Classifier: Decision Tree\n",
      "Score: 0.4316004258339248 \n",
      "\n",
      "Classifier: Random Forest\n",
      "Score: 0.44087118523775726 \n",
      "\n",
      "Classifier: SVC\n",
      "Score: 0.33206174591909154 \n",
      "\n",
      "Classifier: Non - Linear SVM\n",
      "Score: 0.5549591909155429 \n",
      "\n",
      "Classifier: AdaBoost\n",
      "Score: 0.43727821149751595 \n",
      "\n",
      "Classifier: Naive Bayes\n",
      "Score: 0.5449343506032647 \n",
      "\n"
     ]
    }
   ],
   "source": [
    "for name, clf in zip(names_noscaling, classifiers_noscaling):\n",
    "    print(\"Classifier: {}\".format(name))\n",
    "    start_time = time.perf_counter()\n",
    "    \n",
    "    clf_p = make_pipeline(clf)    \n",
    "    valid_score = cross_val_score(clf_p, x_train, y_train, cv=cv)\n",
    "    \n",
    "    clf.fit(x_train, y_train)\n",
    "    y_pred = clf.predict(x_test)\n",
    "    test_acc = metrics.accuracy_score(y_test, y_pred) \n",
    "    \n",
    "    scores.append(score(name, valid_score.mean(), test_acc, time.perf_counter() - start_time))\n",
    "    print(\"Score: {} \\n\".format(test_acc))\n",
    "\n",
    "for name, clf in zip(names_withscaling, classifiers_withscaling):\n",
    "    print(\"Classifier: {}\".format(name))\n",
    "    \n",
    "    clf_p = make_pipeline(preprocessing.StandardScaler(), clf)    \n",
    "    valid_score = cross_val_score(clf_p, x_train, y_train, cv=cv)\n",
    "    \n",
    "    scaler = preprocessing.StandardScaler().fit(x_train)\n",
    "    clf.fit(scaler.transform(x_train), y_train)\n",
    "    y_pred = clf.predict(scaler.transform(x_test))\n",
    "    test_acc = metrics.accuracy_score(y_test, y_pred) \n",
    "    \n",
    "    scores.append(score(name, valid_score.mean(), test_acc, time.perf_counter() - start_time))\n",
    "    print(\"Score: {} \\n\".format(test_acc))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "ExecuteTime": {
     "start_time": "2017-06-19T22:07:28.648Z"
    }
   },
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style>\n",
       "    .dataframe thead tr:only-child th {\n",
       "        text-align: right;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: left;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>name</th>\n",
       "      <th>valid_score</th>\n",
       "      <th>test_score</th>\n",
       "      <th>time_taken</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>Decision Tree</td>\n",
       "      <td>0.992586</td>\n",
       "      <td>0.431600</td>\n",
       "      <td>17.264962</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>Random Forest</td>\n",
       "      <td>0.994693</td>\n",
       "      <td>0.440871</td>\n",
       "      <td>33.555924</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>SVC</td>\n",
       "      <td>0.960417</td>\n",
       "      <td>0.332062</td>\n",
       "      <td>1244.475453</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>Non - Linear SVM</td>\n",
       "      <td>0.903906</td>\n",
       "      <td>0.554959</td>\n",
       "      <td>10601.877003</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>AdaBoost</td>\n",
       "      <td>0.974352</td>\n",
       "      <td>0.437278</td>\n",
       "      <td>10704.173392</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>5</th>\n",
       "      <td>Naive Bayes</td>\n",
       "      <td>0.902552</td>\n",
       "      <td>0.544934</td>\n",
       "      <td>10705.082242</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "               name  valid_score  test_score    time_taken\n",
       "0     Decision Tree     0.992586    0.431600     17.264962\n",
       "1     Random Forest     0.994693    0.440871     33.555924\n",
       "2               SVC     0.960417    0.332062   1244.475453\n",
       "3  Non - Linear SVM     0.903906    0.554959  10601.877003\n",
       "4          AdaBoost     0.974352    0.437278  10704.173392\n",
       "5       Naive Bayes     0.902552    0.544934  10705.082242"
      ]
     },
     "execution_count": 10,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "pd.DataFrame(scores)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "ExecuteTime": {
     "start_time": "2017-06-19T22:07:28.651Z"
    },
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "#Naive Bayes 3.4GB RAM"
   ]
  }
 ],
 "metadata": {
  "anaconda-cloud": {},
  "kernelspec": {
   "display_name": "Python [conda env:p3]",
   "language": "python",
   "name": "conda-env-p3-py"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.6.1"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}
